import os
import numpy as np
import pandas as pd
import argparse
import pickle
from datetime import datetime
from utils import *
from matplotlib import pyplot as plt
import multiprocessing
from joblib import Parallel, delayed
from utils import *
from matplotlib import pyplot as plt
plt.switch_backend('Agg')
def str2bool(v):
    return v.lower() in ("yes", "true", "t", "1")

parser = argparse.ArgumentParser(description="Simulation configuration")
# Define parameters
parser.add_argument("--seed", type=int, default=2016, help="Random seed")
parser.add_argument("--simu_times", type=int, default=100,
                    help="Number of simulation iterations")
parser.add_argument("--heavy_tail_h", type=str2bool, default=False)
parser.add_argument("--heavy_tail_e", type=str2bool, default=False)
parser.add_argument("--multi", type=int, default=1, help="Multiplier for historical client sample size")
parser.add_argument("--ATEB", type=float, default=1.0, help="ATEcontrol")
parser.add_argument("--typeR", type=int, default=1, help="Reward type")
parser.add_argument("--mu_diff", type=float, required=True, help="Difference parameter")
parser.add_argument("--df", type=float, required=True, help="Difference parameter")
args = parser.parse_args()
# Variable parameters
seed = args.seed
heavy_tail_h = bool(args.heavy_tail_h)
heavy_tail_e = bool(args.heavy_tail_e)
simu_times = args.simu_times
multi = args.multi
ATEB=args.ATEB
typeR=args.typeR
mu_diff=args.mu_diff
df=args.df
output_path = f'output/b_{ATEB}/typeR_{typeR}/df_{df}/heavy_tail_e{heavy_tail_e}_heavy_tail_h{heavy_tail_h}'
if not os.path.exists(output_path):
    os.makedirs(output_path, exist_ok=True)
prob_behavior = None 
n = 48
nn = 1 
lr=0.005
epochs=5000#
hidden_size=5 #

p_s = 0
Q_indicator = 'Linear'
method = 'Simple'
ratio_indicator='Given'
delta = 1
np.random.seed(2024)
n = 48 
n0 = 48 
nn = 1  
lr = 0.005
epochs = 5000
hidden_size = 5 
ratio_indicator = 'Given'
num_cores = max(1, multiprocessing.cpu_count() - 4)
def run_single_simulation(sim_index):
    try:
        np.random.seed(2024 + sim_index)
        data_1 = data_gen(50000, nn, 1, d=0, b=ATEB, mu_diff=0, typ=typeR, heavy_tail=heavy_tail_e,df=df)
        data_0 = data_gen(50000, nn, 0, d=0, b=0.0, mu_diff=0, typ=typeR, heavy_tail=heavy_tail_e,df=df)
        ATE_empirical_true = np.mean(data_1['reward']) - np.mean(data_0['reward'])##

        data = data_gen(n, nn, None, d=0, b=ATEB, mu_diff=0, typ=typeR, heavy_tail=heavy_tail_e,df=df )
        data0 = data_gen(n0, nn, 0, d=1, b=ATEB, mu_diff=mu_diff, typ=typeR, heavy_tail=heavy_tail_h,df=df)
        ATE_1_temp = Q_eta_est_backward(data.copy(), 1, None)
        ATE_0_temp_D = Q_eta_est_backward(data.copy(), 0, None)
        ATE_0_temp_H = Q_eta_est_backward(data0.copy(), 0, None)

        ATEs = combine(ATE_1_temp, ATE_0_temp_D, ATE_0_temp_H, n, n0, nn, 
                       ratio_indicator=ratio_indicator, lr=lr, epochs=epochs, hidden_size=hidden_size)
        
        return list(ATEs) + [ATE_empirical_true]  
    
    except Exception as e:
        print(f"❌ an error occurs: {e}")
        return None
results = Parallel(n_jobs=num_cores)(
    delayed(run_single_simulation)(i) for i in range(simu_times)
)
ATEs_df = pd.DataFrame(results, columns=['EDO', 'HDB', 'CWE',
                                          'Non-pessmistic', "Proposed",'LASSO',
                                          'MVE',
                                          'TRUE'])
csv_path = f"{output_path}/mu_diff_{mu_diff:.6f}.csv"
ATEs_df.to_csv(csv_path, index=False)
